Setup

knitr::opts_chunk$set(echo=TRUE)
knitr::opts_chunk$set(warning=FALSE, message=FALSE)

# Load libraries
library(RColorBrewer)
library(dittoSeq)
# Set general input paths to all analysis files
analysis.path <- "/mnt/bb_dqbm_volume/analysis/Tonsil_th152/training/subset"

# Set path to masks
masks.path <- "/mnt/bb_dqbm_volume/data/Tonsil_th152/steinbock/masks_deepcell"
# Import helper fncs
source("../analysis/helpers/helper_fnc.R")

Read Inputs

First, we read in the results from CISI pilot run for the chosen immune markers (CD15, CD20, CD3, CD38, CD4, CD68, CD8a, ICOS, Ki-67, MPO, panCK, SMA, CD303, FOXP3, GranzymeB). The pilot ran was done using no normalization and with k=1.

## Read results
# Read in all results for tonsil into one dataframe
results.files <- list.files(analysis.path, 'simulation_results.txt',
                            full.names=TRUE, recursive=TRUE)
results.files <- results.files[grepl("pilot_immune_channels", results.files)]

results.df <- lapply(results.files, read_results, type="res", use_voi=FALSE) %>% 
  bind_rows() 


## Read X_test and X_simulated
# Specify X_test and X_simulated files to be read in
X.files <- list.files(analysis.path, "X_", full.names=TRUE, recursive=TRUE)
X.files <- X.files[grepl("pilot_immune_channels", X.files)]

# Read in all sce experiments from saved anndata and save additional info in metadata
# (e.g. which dataset is used in training and testing and if it is the ground truth or simulated X)
X.list <- lapply(X.files, read_results, type="x", use_voi=FALSE)
X.list <- lapply(X.list, function(sce.temp){
  assay(sce.temp, "exprs") <- asinh(counts(sce.temp)/1)
  
  sce.temp
})



## Read U
u.files <- list.files(analysis.path, "gene_modules.csv",
                      full.names=TRUE, recursive=TRUE)
# TODO: change this line if more files come along and need to exclude
u.files <- u.files[grepl("pilot_immune_channels", u.files)]
u <- lapply(u.files, read_U, type="training") %>% 
  bind_rows() %>%
  dplyr::rename(dataset=rep) %>%
  mutate(k="1")


## Read A
a.files <- list.files(analysis.path, "version_*",
                      full.names=TRUE, recursive=TRUE)
# TODO: change this line if more files come along and need to exclude
a.files <- a.files[grepl("pilot_immune_channels", u.files)]
a <- lapply(a.files, read_A, use_voi=FALSE) %>% 
  bind_rows() %>%
  mutate(k="1")


# Read in masks for tonsil data
masks <- loadImages(masks.path, as.is=TRUE)
mcols(masks) <- DataFrame(sample_id=names(masks))

Results

Barplot of Results (k=1)

For each different measurement used to test training results, a barplot is shown comparing CISI results.

# For each different measurement of training results, plot barplot 

# Melt dataframe for plotting
data_temp <- results.df %>%
  dplyr::select(-c("dataset", "training", "datasize", "version", "Best crossvalidation fold")) %>%
  pivot_longer(!c("simulation"), names_to="measure", values_to="value")

# Create barplot
results.barplot <- plot_cisi_results(data_temp, "measure", "value", "measure")
print(results.barplot)

Per protein results

Correlations per Protein (k=1, arcsinh)

Correlation between ground truth and decomposed results per protein for
asinh transformed counts.

aoi <- "exprs"
# Calculate correlations between ground truth and simulated data for each protein
X.cor <- lapply(X.list, function(sce){
  counts.long <- as.data.frame(assays(sce)[[aoi]]) %>%
    mutate(protein=rownames(.)) %>%
    melt() %>%
    dplyr::rename(!!as.symbol(metadata(sce)$ground_truth):=value,
                  cell=variable) %>%
    mutate(dataset=paste(metadata(sce)$training, metadata(sce)$dataset, sep="_"))
  
}) %>% bind_rows() %>%
  group_by(protein, cell, dataset) %>%
  summarise_all(na.omit) %>%
  group_by(dataset, protein) %>%
  mutate(correlation=cor(ground_truth, simulated))


# Plot correlations for k=1 for each test/training dataset combination
proteinCorr <- plot_protein_cor(X.cor) + ylim(0, 1)
print(proteinCorr)

Image results

FOXP3 of Test Image (k=1, arcsinh)

Expression values for cells in test image for worst performing protein FOXP3.

# Call plot_cells to get individual plots for test roi for decomposed and true
# results
poi <- "FOXP3"
img.foxp3 <- plot_cells(X.list, masks, poi)
# Plot decomposed vs true results for test roi (002)
img.foxp3 <- plot_grid(plotlist=append(img.foxp3[grepl("20220520_TsH_th152_cisi1_002",
                                                       names(img.foxp3))],
                                       img.foxp3[grepl("legend",
                                                       names(img.foxp3))]), ncol=2, 
                       labels=unlist(lapply(X.list, function(sce){metadata(sce)$ground_truth})),
                       label_size=15, hjust=c(-2, -1.5), 
                       vjust=1, scale=0.9)
img.foxp3

CD303 of Test Image (k=1, arcsinh)

Expression values for cells in test image for second worst performing protein CD303.

# Call plot_cells to get individual plots for test roi for decomposed and true
# results
poi2 <- "CD303"
img.cd303 <- plot_cells(X.list, masks, poi2)
# Plot decomposed vs true results for test roi (002)
img.cd303 <- plot_grid(plotlist=append(img.cd303[grepl("20220520_TsH_th152_cisi1_002",
                                                       names(img.cd303))],
                                       img.cd303[grepl("legend",
                                                       names(img.cd303))]), ncol=2, 
                       labels=unlist(lapply(X.list, function(sce){metadata(sce)$ground_truth})),
                       label_size=15, hjust=c(-2, -1.5), 
                       vjust=1, scale=0.9)
img.cd303

CD303 protein distribution (k=1, arcsinh)

# Have a look at dittoRidgePlots of a bad performing protein
X.list <- lapply(X.list, function(sce.temp) {
  colData(sce.temp)$dummy <- "1"
  
  sce.temp
})

plot.list <- NULL
for(sce in X.list){
  plot.list <- append(plot.list, list(dittoRidgePlot(sce, var="CD303", 
                                                     group.by="dummy", assay="exprs") +
                                        ggtitle(metadata(sce)$ground_truth) +
                                        coord_cartesian(xlim=c(0, 6.2), expand=TRUE)))
}
print(plot_grid(plotlist=plot.list, ncol=1))

Matrix designs

U

u.plot <- plot_single_U(u %>%
                          dplyr::select(module, membership, protein), 
                        paste0("U", length(unique(u$module))))

print(u.plot)

A

a.plot <- plot_single_A(a %>%
                          dplyr::select(-c("dataset", "training", "k")), "A")

print(a.plot)